from __future__ import division

from plyny.collections2.slice import convert_args
from core.type_checking import verify_type
from plyny.collections2.bigchain import retaining_bigchain, bigchain
from plyny.presentation_table import Presentation
from stats_table import Statistics

import math
import sys
from collections import namedtuple
import operator
from itertools import starmap, imap, chain, izip, ifilter, islice
from collections import deque


# Todo:
# * assignments can cause us to lose iterator:
#   * e.g. d = d | ratio, if d is not unrolled
# * is_clean still necessary?


def check_indexes(f):
    def inner(*args, **kw):
        self = args[0]
        return f(self, *self._check_indexes(*args[1:]), **kw)
    return inner


class inner_func(object):
    def __init__(self, func):
        self.func = func

    def __call__(self, x):
        d = x.empty_copy()
        for item in x:
            d.add(*self.func(item))
        return d


class _Base(object):
    def __init__(self, dataobj, *column_names):
        if len(set(column_names)) != len(column_names):
            raise ValueError('Column names must be unique')

        verify_type(dataobj, _IterableData, 'dataobj')
        for col in column_names:
            if ' ' in col:
                raise ValueError('Column names cannot contain spaces')

        self._sort_column = None
        self._reverse = None
        self._comparator = None

#        if len(column_names) == 1 and isinstance(column_names, (tuple, list)):
#            column_names = column_names[0]

        self._column_names = column_names
        if self._column_names:
            self._width = len(self._column_names)
        else:
            self._width = None

        self._data = dataobj

    def unroll(self):
        """ Make sure all generators are exhausted. """

        list(self)

    def __len__(self):
        return len(self._data)

    def rename(self, name, index=None):
        if index is None:
            if self.width() != 1:
                raise ValueError('need index')
            index = 0

        x = list(self._column_names)
        x[index] = name
        self._column_names = tuple(x)
        return self

    def __iter__(self):
        if self._sort_column is not None:
            values = list(self._iter())
            dec = iter
            if self._reverse:
                dec = reversed
            return dec(sorted(values, lambda x, y:
                self._comparator(self._sort_column(x), self._sort_column(y))))

        return self._iter()

    def width(self):
        if self._width is not None:
            return self._width

        if not self._data.is_empty():
            return self._data.width()

        return len(self.column_names)

    @classmethod
    def left_outer_join(cls, *tables):
        return cls._merge_operation(False, True, None, *tables)

    @classmethod
    def join_on(cls, join_key, *tables):
        return cls._merge_operation(True, False, join_key, *tables)

    @classmethod
    def join(cls, *tables):
        return cls._merge_operation(True, False, None, *tables)

    @classmethod
    def outer_join(cls, *tables):
        return cls._merge_operation(False, False, None, *tables)

    @classmethod
    def _merge_operation(cls, inner_join, left, join_key=None, *tables):
        if len(tables) == 1:
            # XXX should 1 table be accepted?
            return tables[0].copy()
        elif len(tables) < 1:
            raise ValueError('Need at least 1 tables')

        if join_key is None:
            join_key = set(tables[0].column_names)

            # make sure everyone has 1 common key
            for x in (set(x.column_names) for x in tables[1:]):
                join_key &= x

            if len(join_key) != 1:
                raise ValueError('Need a single common key')

            # grab that key ...
            join_key = list(join_key)[0]

        # create column name list combining all table's column names
        column_names = []
        for t in tables:
            tc = list(t.column_names)
            del tc[t.column_names.index(join_key)]
            column_names.append(tc)

        indexes = [table.index(join_key) for table in tables]

        all_keys_in_order = []

        col_names = [join_key] + reduce(list.__add__, column_names)
        joined = tables[0]._create(*col_names)

        if inner_join:
            def _inner():
                for row in tables[0]:
                    key = getattr(row, join_key)
                    values = list(row)
                    for ix in indexes[1:]:
                        value = ix.get(key, None)
                        if value is None:
                            values = None
                            break
                        for fieldname, v in zip(value._fields, value):
                            if fieldname != join_key:
                                values.append(v)

                    if values is not None:
                        yield values

            joined.extend(_inner())
            return joined

#                if key in valid:
#                    all_keys_in_order.append(key)
            # Get all keys in order
        columns = [table.columns(join_key) for table in tables]
        seen = set()
        add = seen.add
        append = all_keys_in_order.append
        for column in columns:
            for value in column:
                if value in seen:
                    continue
                add(value)
                append(value)

        def pad_null(v, cols):
            if v is None:
                return [None for x in xrange(len(cols))]
            else:
                return [getattr(v, k) for k in cols]

        def _inner():
            for k in all_keys_in_order:
                vals = []
                for i, (index, cols) in enumerate(zip(indexes, column_names)):
                    res = pad_null(index.get(k, None), cols)
                    if left and i == 0 and len(filter(lambda x: x is not None, res)) == 0:
                        break
                    vals += res
                else:
                    yield [k] + vals

        joined.extend(_inner())
        return joined

    @classmethod
    def left_sorted_join(cls, key1, *datatables, **kw):
        allow_dup_keys = 'allow_dup_keys' in kw
        return cls._sorted_join(key1, allow_dup_keys, True, True, *datatables)

    @classmethod
    def sorted_outer_join(cls, key1, *datatables, **kw):
        allow_dup_keys = 'allow_dup_keys' in kw
        return cls._sorted_join(key1, allow_dup_keys, True, False, *datatables)

    @classmethod
    def sorted_join(cls, key1, *datatables, **kw):
        allow_dup_keys = 'allow_dup_keys' in kw
        return cls._sorted_join(key1, allow_dup_keys, False, False, *datatables)

    @classmethod
    def _sorted_join(cls, key1, allow_dup_keys, outer, left, *datatables):
        # Assumes the lists have the same order of key1 (though not necessarily
        # identical keys)

        labels = list(reduce(tuple.__add__, (tuple(x.column_names) for x in datatables)))
        matches = []
        for i, item in enumerate(labels):
            if item == key1:
                matches.append(i)

        for value in reversed(matches[1:]):
            del labels[value]

        if allow_dup_keys:
            from collections2.counter import Counter
            c = Counter()
            for item in labels:
                c.increment(item)

            seen = Counter()
            new_labels = []
            for label in labels:
                if c[label] > 1:
                    seen.increment(label)
                    label = '%s%02d' % (label, seen[label])
                new_labels.append(label)

            labels = new_labels

        return cls(*labels).extend(DataTable.__sorted_join(key1, outer, left, *datatables))

    @check_indexes
    def minus(self, *indexes):
        indexes = set(indexes)
        every = set(self._check_indexes())
        return self.narrow(*sorted(list(every - indexes)))

    @staticmethod
    def __sorted_join(key1, outer, left, *datatables):
        iterables = [iter(x) for x in datatables]

        valid_rows = set(range(len(datatables)))
        import new
        needs_more = new.classobj('NEEDS_MORE', (object,), {})

        row = [needs_more for x in datatables]
        cols = [x.column_names for x in datatables]

        def pad_null(cols):
            return [None for x in xrange(len(cols) - 1)]  # 1 less to remove common key

        while valid_rows:
            to_delete = []
            for i in valid_rows:
                d = iterables[i]
                if row[i] is needs_more:
                    try:
                        row[i] = d.next()
                    except StopIteration:
                        to_delete.append(i)

            if to_delete and not outer:
                break

            for value in to_delete:
                valid_rows = valid_rows - set([value])

            if not valid_rows:
                break

            filtered = [x for x in row if x is not needs_more]
            assert len(filtered) != 0

            values = [getattr(x, key1) for x in filtered]
            m = min(values)

            new_row = []
            left_valid = False
            for i, item in enumerate(row):
                if i not in valid_rows:
                    item = None

                invalid = item is None or getattr(item, key1) != m
                if i == 0:
                    left_valid = not invalid

                if invalid:
                    if outer and (not left or left_valid):
                        new_row += pad_null(cols[i])
                    else:
                        new_row = None
                else:
                    values = []
                    for fieldname, v in zip(item._fields, item):
                        if fieldname != key1:
                            values.append(v)
                    if new_row is not None:
                        new_row += values
                    row[i] = needs_more

            if new_row is not None:
                yield [m] + new_row

    def nested_index(self, *keys):
        asdict = {}
        for key in keys:
            if key is None or key not in self.column_names:
                raise ValueError('%s not a column' % v)

        for value in self:
            ad = asdict
            for key in keys[:-1]:
                ad = ad.setdefault(getattr(value, key), {})
            ad[getattr(value, keys[-1])] = value

        return asdict

    def index(self, key, value=None):
        asdict = {}

        for v in (key, value):
            if v is not None and v not in self.column_names:
                raise ValueError('%s not a column' % v)

        for v in self:
            val = v
            if value is not None:
                val = getattr(v, value)

            asdict[getattr(v, key)] = val
        return asdict

    def _indexes(self, *indexes):
        seen = set()
        new_indexes = []
        for i in indexes:
            if isinstance(i, basestring) and i.isdigit():
                i = int(i)

            if isinstance(i, int):
                val = i
                # XXX
                if val > self.width():
                    val %= self.width()
            else:
                try:
                    val = self.column_names.index(i)
                except ValueError:
                    raise ValueError('%s not valid column label' % i)

            if val not in seen:
                new_indexes.append(val)
                seen.add(val)
#
#        if len(new_indexes) == 1:
#            return new_indexes[0]

        new_new_indexes = []
        for index in new_indexes:
            if index < 0:
                index = len(self.column_names) + index
            new_new_indexes.append(index)

        return new_new_indexes

    def sort_by(self, col, reverse=False, comparator=cmp):
        """ Sort. Manifests at iteration time i.e., does not sort until
        iteration ooccurs."""

        # XXX doesn't work on single column

        if not isinstance(col, int):
            col = self._indexes(col)[0]
        if self.width() == 1:
            self._sort_column = lambda x: x
        else:
            self._sort_column = operator.itemgetter(col)

        self._reverse = reverse
        self._comparator = comparator
        return self

    def shuffle(self):
        """ Randomize order. Occurs at call time. """
        # requires list mode

        l = list(self)
        import random
        random.shuffle(l)
        return self.replace(l)

    def empty_copy(self):
        """ Creates empty copy of this class:`DataTable`; same column names
        with no values. """

        return self._create(*self.column_names)

    def copy(self):
        """ Copy of this class:DataTable. """
        return self.replace(list(self))

    def rank(self, column, new_column_name='n'):
        d = self._create(new_column_name)

        last = None
        rank = -1
        for v in sorted(self.narrow(column)):
            if last is None or last != v:
                rank += 1
            d.add(rank)
            last = v

        return self | d

    def enumerate(self, new_column_name='n'):
        return self | self._create(new_column_name).extend(xrange(len(self)))

    def extend(self, other):
        """ Add all values from other. """

        if self is other:
            raise ValueError('can\'t extend self')

        self._data.extend(other)

        return self

    def replace(self, other):
        return self.empty_copy().extend(other)

    @classmethod
    def connect(cls, *tables):
        if len(tables) <= 1:
            return tables

        return reduce(t[0].tape, t)

    def fuse(self, other):
        # XXX same thing as extend?
        """ Create :class:DataTable that is the two :class:DataTable's in
        order. Column names must align. """

        common = []
        for column_name in self.column_names:
            if column_name in other.column_names:
                common.append(column_name)

        dt = self._create(*common)
        dt.extend(self.narrow(*common))
        dt.extend(other.narrow(*common))
        return dt

    def tape(self, other):
        return self + other

    def modify(self, fun, name=None):
        if name is not None:
            d = DataTable(name)
        else:
            d = DataTable()

        d.extend(imap(fun, self))
        d._data._length = len(self)
        return d

    def modify_column(self, fun, index):
        raise NotImplementedError('not tested')
        def _inner(values):
            for v in self:
                row = v[:]
                row[index] = fun(row)
                yield row

        return self.replace(_inner())

    def filter(self, fun):
        """ XXX need doc """
        return self.replace(ifilter(fun, self))

    def filtersplit(self, fun):
        """ XXX need doc """
        from core.itertools2 import filtersplit
        return [self.replace(x) for x in filtersplit(fun, self)]

    def group(self, key, assume_ordered=False):
        """ Group values by key.  Returns dict of key -> :class:DataTable. """
        values = {}
        from collections2.ordereddict import OrderedSet
        o = OrderedSet()
        last = None
        last_group = None
        for value in self:
            v = getattr(value, key)
            if assume_ordered:
                if v != last:
                    if last is not None:
                        yield last, last_group
                        del last_group

                    last_group = self.empty_copy()

                last = v
                last_group.add(*value)
            else:
                o.add(v)
                values.setdefault(v, self._create(*self.column_names)).add(*value)

        if not assume_ordered:
            for item in o:
                yield (item, values[item])
        else:
            if last_group:
                yield last, last_group

    def _check_indexes(self, *indexes):
        if len(indexes) == 0:
            indexes = xrange(self.width())

        return self._indexes(*indexes)

    def ordinalize(self, *indexes):
        from plyny.core.itertools2 import ordinalize

        dt = self._create(*self.get_column_names(*indexes))
        l = self.narrow(*indexes)

        if dt.width() == 1:
            l = [(x,) for x in l]
        else:
            l = list(l)

        o = ordinalize(l)
        if dt.width() == 1:
            o = (i[0] for i in o)

        dt.extend(o)
        return dt

    def normalize2(self, *indexes):
        cols = self.columns(*indexes)

        sums = self.sum(*indexes)
        new_cols = []
        def div(a, b):
            if a is None:
                return None
            return a / b

        for col, s in izip(cols, sums):
            new_cols.append(col.modify(lambda x: div(x, s)))

        return reduce(self.__class__.__or__, new_cols)

    def normalize(self, *indexes):
        new_cols = []
        cols = self.columns(*indexes)
        if not isinstance(cols, list):
            cols = [cols]

        for col in cols:
            mn, mx = col.minmax()
            dt = col.empty_copy()

            try:
                diff = mx - mn
            except TypeError:
                diff = 0

            if diff == 0:
                dt.extend(col)
            else:
                for v in col:
                    if v is None:
                        dt.add(None)
                        continue
                    try:
                        dt.add((v - mn) / (mx - mn))
                    except TypeError:
                        dt = col.empty_copy()
                        dt.extend(col)
                        break

            new_cols.append(dt)

        for i in xrange(len(new_cols) - 1):
            if len(new_cols[i]) != len(new_cols[i + 1]):
                raise ValueError('Something went wrong, cols not equal length')
        return reduce(self.__class__.__or__, new_cols)

    ##############
#---# Operators  #------------------------------------------------------------#
    ##############

    def __neg__(self):
        def _inner():
            for x in self.clean():
                if isinstance(x, (list, tuple)):
                    x = [-y for y in x]
                else:
                    x = [-x]

                if len(x) == 1:
                    x = x[0]  # XXX

                yield x

        return self.replace(_inner())

    def __div__(self, other):
        return self._operation(other, operator.__div__)

    def __truediv__(self, other):
        return self._operation(other, operator.__truediv__)

    def __sub__(self, other):
        return self._operation(other, operator.__sub__)

    def __add__(self, other):
        return self.copy().extend(other)
#        return self._operation(other, operator.__add__)

    @check_indexes
    def unzip(self, *indexes):
        index_set = set(indexes)

        sliceby = []
        for i in xrange(len(self.column_names)):
            if i not in index_set:
                sliceby.append(i)

        return self.narrow(*sliceby), self.narrow(*indexes)

    def __and__(self, other):
        return self.__class__.join(self, other)

    def __or__(self, other):
        d = self._create(*self.column_names + other.column_names)
        def _inner():
            for x, y in izip(self, other):
                if not isinstance(x, tuple):
                    x = (x,)
                if not isinstance(y, tuple):
                    y = (y,)

                yield x + y

        return d.extend(_inner())

    def __mul__(self, other):
        return self._operation(other, operator.__mul__)

    def _operation(self, other, _operation):
        dt = self._create('%sx%s' % (self.column_names[0], other.column_names[0]))
        def _inner():
            try:
                for item in izip(self, other):
                    if item[0] is None or item[1] is None:
                        yield None
                    else:
                        yield _operation(item[0],  item[1])
            except TypeError:
                raise TypeError('Column mismatch')

        return dt.extend(_inner())

    def minmax(self, *indexes):
        def _minmax(values):
            mn, mx = None, None
            for v in values:
                if mn is None:
                    mn = v
                    mx = v
                else:
                    mn = min(mn, v)
                    mx = max(mx, v)

            return mn, mx

        return self._summarize(_minmax, *indexes)

    def min(self, *indexes):
        def _inner(values):
            if values:
                return min(values)
            return None

        return self._summarize(_inner, *indexes)

    def max(self, *indexes):
        def _inner(values):
            if values:
                return max(values)
            return None

        return self._summarize(_inner, *indexes)

    def sum(self, *indexes):
        """ Sum of columns, one per column.  Columns are cleaned. """

        return self._summarize(sum, *indexes)

    ####################
#---# Slicing & dicing #------------------------------------------------------#
    ####################

#    def __getitem__(self, keys):
#        if isinstance(keys, tuple):
#            if len(keys) != 2:
#                raise ValueError('Only 2 dimensions supported (%d given)' % len(keys))
#            if isinstance(keys[0], slice)

    def pmap(self, func, n, split_into=True):
        from core.pmap import pmap
        return self._parallel_map(func, n, pmap, split_into)

    def parallel_map(self, func, n, split_into=True):
        from multiprocessing import Pool
        pool = Pool(processes=n)
        return self._parallel_map(func, n, pool.map, split_into)

    def _parallel_map(self, func, n, mapper, split_into=True):
        if split_into:
            groups = self.split_into(len(self) / n)
        else:
            groups = self.split_by(n)

#        groups = [x.list_of_lists() for x in groups]

        d = self.empty_copy()
        result = mapper(inner_func(func), groups)

        if not split_into:
            result = izip(*result)

        [d.extend(group) for group in result]

        return d

    def split_by(self, n):
        d = [deque() for d in xrange(n)]
        itr = enumerate(self)

        def inner(i):
            while True:
                if not d[i]:
                    while True:
                        x, val = itr.next()
                        x %= n
                        d[x].append(val)
                        if x == i:
                            break

                yield d[i].popleft()

        return [self.empty_copy().extend(inner(x)) for x in xrange(n)]

    def get_group(self, i, out_of):
        # XXX not efficient due to iterating through all values until i
        if i >= out_of or i < 0:
            raise ValueError('!(0 <= %d < %d)' % (i, out_of))

        for x, g in enumerate(self.split_into(out_of)):
            if x == i:
                return g

    def chunk(self, by):
        vals = self.empty_copy()
        for i, value in enumerate(self):
            if i % by == 0 and i != 0:
                yield vals
                vals = self.empty_copy()

            vals.add(value)

        if vals:
            yield vals

    def split_into(self, n):
        """ Split into groups of n. """

        if len(self) < n:
            raise ValueError('%d > len(self) (%d) ' % (n, len(self)))

        by = int(float(len(self)) / n)

        cur = None
        for i, value in enumerate(self):
            if i % by == 0:
                if cur is not None:
                    yield cur
                cur = self.empty_copy()
            if not isinstance(value, tuple):
                value = [value]
            cur.add(*value)

        if cur is not None:
            yield cur

    @check_indexes
    def columns(self, *indexes):
        """ Return list of :class:DataTable's, one per column. """

        dts = [self._create(self.column_names[index]) for index in indexes]
        deques = [deque() for i in xrange(len(indexes))]
        itr = iter(self.narrow(*indexes))

        def _get(i):
            while True:
                if not deques[i]:
                    val = next(itr)
                    if isinstance(val, tuple):
                        for j, value in enumerate(val):
                            deques[j].append(value)
                    else:
                        assert len(deques) == 1, deques
                        deques[i].append(val)

                yield deques[i].popleft()

        for i in xrange(len(indexes)):
            dts[i].extend(_get(i))

        if len(dts) == 1:
            return dts[0]

        return dts

    def _x_columns(self, *indexes):
        indexes = self._indexes(*indexes)
        cols = [self._create(self.column_names[index]) for index in indexes]

        wrap = None

        for v in self.narrow(*indexes):
            if wrap is None:
                wrap = isinstance(v, (list, tuple))

            if wrap:
                for i, val in enumerate(v[index] for index in indexes):
                    cols[i].add(val)
            else:
                cols[0].add(v)

        return cols

    def types(self):
        return self._data.types()

    def narrow_after(self, index):
        return self.narrow_range(index, self.width())

    @check_indexes
    def narrow_range(self, start, end):
        return self.narrow(*xrange(start, end))

    @check_indexes
    def narrow(self, *indexes):
        def _inner():
            for v in self:
                x = [v[i] for i in indexes]
                if len(x) == 1:
                    x = x[0]
                yield x

        if self.width() == 1:
            return self

        # todo: handle when indexes == all

        d = self._create(*self.get_column_names(*indexes))
        d.extend(_inner())
        #d = self.__class__(*(self.column_names[c] for c in cols))

        return d

    def _create(self, *column_names):
        return self.__class__(*column_names)

    def __getslice__(self, *args):
        return self.slice(*args)

    def slice(self, *args):
        return self.replace(self._data.slice(*args))

    def range(self, *args):
        s = slice(*args)
        it = iter(xrange(s.start or 0, s.stop or len(self), s.step or 1))
        self_it = iter(self)
        def _inner():
            nexti = next(it)
            for i, element in enumerate(self_it):
                if i == nexti:
                    yield element
#                        dt.add(*element)
                    nexti = next(it)

        return self.replace(_inner())

    def _itr_of_type(self, op):
        if self.width() == 1:
            itr = ((x,) for x in self)
        else:
            itr = self

        return (op(x) for x in itr)

    def _list_of_type(self, op):
        return list(self._itr_of_type(op))

    def as_lists(self):
        return self._itr_of_type(list)

    def list_of_lists(self):
        return self._list_of_type(list)

    def list_of_tuples(self):
        return self._list_of_type(tuple)

    ####################
#---#                  #------------------------------------------------------#
    ####################

    def __repr__(self):
        return '%s%s' % (self.__class__.__name__, self.column_names)

    @check_indexes
    def get_column_names(self, *indexes):
        return tuple(self.column_names[x] for x in indexes)

    @property
    def column_names(self):
        """ Names of columns. """
        # XXX make sure consistent in returning of list vs. tuple

        if not self._column_names:
            if self._data is None:
                raise ValueError('Empty datatable')

            if self._data.is_empty():
                raise ValueError('Does not have column names')

            from util.labels import labels
            self._column_names = tuple(labels(self._data.width()))
            self._width = len(self._column_names)

        return tuple(self._column_names)

    #####################
#---# Utility functions #-----------------------------------------------------#
    #####################

    @check_indexes
    def clean(self, *indexes):
        """ Create a :class:DataTable in which all row values are not None. """

        def _inner():
            for val in self:
                test = val
                if not isinstance(val, (tuple, list)):
                    test = [val]

                for i, v in enumerate(test):
                    if i not in indexes:
                        continue

                    if v is None:
                        break
                    if isinstance(v, float) and math.isnan(v):
                        break
                else:
                    yield val

#        dt._is_clean = True
        return self.replace(_inner())

    def __eq__(self, other):
        return not any(starmap(operator.ne, izip(self, other)))

    #####################
#---# Modifiers         #-----------------------------------------------------#
    #####################

    def add_table(self, tbl):
        map(self.add, tbl)

    def add(self, *row):
        """ Add values. """

#        if len(row) == 1 and isinstance(row[0], (tuple, list)):
#            row = row[0]

#        if self._column_names or not self._data.is_empty():
#            if len(row) != self.width():
#                raise ValueError('%s values sent for %s width' % (len(row),
#                    self.width()))

        if self.width == 1 or len(row) == 1:
            # self.width check is optimization
            row = row[0]

        self._data.append(row)
        return self

    def _iter(self):
        # XXX may want to catch errors and throw as format problem

        if self._data is None or self._data.is_empty():
            return iter([])

        if self._data.width() == 1:
            return iter(self._data)

        n = namedtuple('row', ' '.join(self.column_names))
        return (n(*values) for values in self._data)

    def _stop_iteration(self):
        raise StopIteration()


class _IterableData(object):
    def __init__(self):
        self._type_guesses = None
        self._width = None

    def width(self):
        if self.is_empty():
            raise ValueError('No width on empty data source')

        if self._width is None:
            self._get_meta()

        return self._width
#        return len(self._revealed[0])

    def _get_meta(self, values=None):
        if self._type_guesses is None:
            self._type_guesses = []
            if values is None:
                values = self.peek()

            if isinstance(values, (tuple, list)):
                for item in values:
                    guess = None
                    if isinstance(item, bool):
                        # bool must come first because it is also int
                        guess = bool
                    elif isinstance(item, int):
                        guess = int
                    elif isinstance(item, float):
                        guess = float
                    self._type_guesses.append(guess)

                self._width = len(values)
            else:
                self._width = 1

    def types(self):
        if self._type_guesses is None:
            self._get_meta()

        return self._type_guesses


class RetainingStream(retaining_bigchain, _IterableData):
    def __init__(self, *itr):
        retaining_bigchain.__init__(self, *itr)
        _IterableData.__init__(self)


class NonRetainingStream(bigchain, _IterableData):
    def __init__(self, *itr):
        bigchain.__init__(self, *itr)
        _IterableData.__init__(self)
        self._has_iterated = False

    def __iter__(self):
        if self._has_iterated:
            raise ValueError('Already iterated')
        self._has_iterated = True
        return bigchain.__iter__(self)


class DataTable(_Base, Statistics, Presentation):
    def __init__(self, *column_names):
        super(DataTable, self).__init__(RetainingStream(), *column_names)


class KnownLengthStreamingTable(DataTable):
    def __init__(self, length, *column_names):
        _Base.__init__(self, NonRetainingStream(length), *column_names)

    def _create(self, *column_names):
        return self.__class__(0, *column_names)

    def tape(self, other):
        d = self.empty_copy()
        d.extend(self)
        d.extend(other)
        d._data.length = len(self) + len(other)
        return d

#    def add(self, *row):
#        super(KnownLengthStreamingTable, self).add(*row)
#        self._data.length += 1
#
#    def extend(self, other):
#        try:
#            l = len(other)
#        except NotImplementedError:
#            raise TypeError('can only extend with items of known length')
#
#        super(KnownLengthStreamingTable, self).extend(other)
#        self._data.length += l


class StreamStream(_IterableData):
    def __init__(self, *streams):
        """ Stream over streams. """
        self.streams = streams
        self._width = self.streams[0].width()

    def __iter__(self):
        return chain(*self.streams)

    def is_empty(self):
        return not any(len(x) > 0 for x in self.streams)

    def __len__(self):
        return sum(len(x) for x in self.streams)


class StreamStreamTable(DataTable):
    def __init__(self, *streams):
        if len(streams) == 0:
            raise ValueError('len(streams) == 0')
        super(DataTable, self).__init__(StreamStream(*streams),
                *streams[0].column_names)

    def get_group(self, i, out_of):
        if i >= out_of or i < 0:
            raise ValueError('!(0 <= %d < %d)' % (i, out_of))

        by = len(self) / out_of
        return self.slice(int(i * by), int((i + 1) * by))

    def slice(self, *args):
        s = slice(*args)
        start = s.start
        stop = s.stop
        startindex = None
        endindex = len(self._data.streams)

        for i in xrange(len(self._data.streams)):
            lens = len(self._data.streams[i])
            if startindex is None:
                if lens > start:
                    startindex = i
                    if stop is None:
                        break
                else:
                    start -= lens

            if stop is not None:
                if lens > stop:
                    endindex = i
                    break

                stop -= lens

        stp = stop
        if stp is None:
            stp = len(self)
        stp = min(stp, len(self))
        strt = start or 0

#        import pdb; pdb.set_trace()
        d = KnownLengthStreamingTable(0, *self.column_names)
        if startindex == endindex:
            d = d.tape(self._data.streams[startindex].slice(start, stop))
        else:
            d = d.tape(self._data.streams[startindex].slice(start, None))
            for i in xrange(startindex + 1, endindex):
                d = d.tape(self._data.streams[i])

            if stop > 0:
                d = d.tape(self._data.streams[endindex].slice(stop))
        return d


class StreamingTable(DataTable):
    def __init__(self, *column_names):
        super(DataTable, self).__init__(NonRetainingStream(),
                *column_names)


if __name__ == '__main__':
    a = [(x, x + 1) for x in xrange(10)]
    d = DataTable()
    [d.add(*x) for x in a]
#    d.extend(a)
    print d.width()
    print d._data._type_guesses
